1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
| import scrapy from scrapy import signals from selenium import webdriver from selenium.webdriver import FirefoxOptions
class JdSpider(scrapy.Spider): name = 'jd' allowed_domains = ['jd.com'] start_urls = [ 'https://www.jd.com/', 'https://miaosha.jd.com/' ] @classmethod def from_crawler(cls, crawler, *args, **kwargs): spider =super(JdSpider, cls).from_crawler(crawler, *args, **kwargs) spider.driver = webdriver.Firefox() crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed) return spider def spider_closed(self, spider): spider.driver.close() print("==========爬虫结束!") spider.logger.info('Spider closed:%s', www.spider.name)
def parse(self, response): item = {} print(response.url)
|